{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gTm2dvjVFEtm",
    "papermill": {
     "duration": 0.016127,
     "end_time": "2021-03-23T20:00:44.268199",
     "exception": false,
     "start_time": "2021-03-23T20:00:44.252072",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Causal Identification in DAGs using Backdoor and Swigs, Equivalence Classes, Falsifiability Tests\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_execution_state": "idle",
    "_uuid": "051d70d956493feee0c6d64651c6a088724dca2a",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 504
    },
    "executionInfo": {
     "elapsed": 5192,
     "status": "error",
     "timestamp": 1623785190931,
     "user": {
      "displayName": "Vasilis Syrganis",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjxUYeW9b7fysN1nK65PWILriYGqqfmpUsWgLP7ZA=s64",
      "userId": "00968929417250286436"
     },
     "user_tz": 240
    },
    "id": "vneTlqjYFEtn",
    "outputId": "3d5637d4-7daf-4368-fc48-a3bd03a80235",
    "papermill": {
     "duration": 29.813743,
     "end_time": "2021-03-23T20:01:14.096635",
     "exception": false,
     "start_time": "2021-03-23T20:00:44.282892",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%pip install rpy2\n",
    "%load_ext rpy2.ipython\n",
    "import numpy as np\n",
    "import statsmodels.formula.api as smf\n",
    "import rpy2\n",
    "from rpy2.robjects import r, pandas2ri\n",
    "pandas2ri.activate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "install.packages(\"dagitty\")\n",
    "install.packages(\"ggdag\")\n",
    "library(dagitty)\n",
    "library(ggdag)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "GyEYPxq1FEtp",
    "papermill": {
     "duration": 0.019562,
     "end_time": "2021-03-23T20:01:14.135443",
     "exception": false,
     "start_time": "2021-03-23T20:01:14.115881",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Graph Generation and Plotting "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lSn_zcXOFEtp",
    "papermill": {
     "duration": 0.016935,
     "end_time": "2021-03-23T20:01:14.168961",
     "exception": false,
     "start_time": "2021-03-23T20:01:14.152026",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "The following DAG is due to Judea Pearl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 113
    },
    "executionInfo": {
     "elapsed": 210,
     "status": "error",
     "timestamp": 1623785197872,
     "user": {
      "displayName": "Vasilis Syrganis",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjxUYeW9b7fysN1nK65PWILriYGqqfmpUsWgLP7ZA=s64",
      "userId": "00968929417250286436"
     },
     "user_tz": 240
    },
    "id": "sANH-JE3FEtq",
    "outputId": "83c66089-5ef4-4987-a41c-dc91fb33020c",
    "papermill": {
     "duration": 1.087289,
     "end_time": "2021-03-23T20:01:15.272991",
     "exception": false,
     "start_time": "2021-03-23T20:01:14.185702",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "G = dagitty('dag{\n",
    "Z1 [pos=\"-2,-1.5\"]\n",
    "X1 [pos=\"-2,0\"]\n",
    "Z2 [pos=\"1.5,-1.5\"]\n",
    "X3 [pos=\"1.5, 0\"]\n",
    "Y [outcome,pos=\"1.5,1.5\"]\n",
    "D [exposure,pos=\"-2,1.5\"]\n",
    "M [mediator, pos=\"0,1.5\"]\n",
    "X2 [pos=\"0,0\"]\n",
    "Z1 -> X1\n",
    "X1 -> D\n",
    "Z1 -> X2\n",
    "Z2 -> X3\n",
    "X3 -> Y\n",
    "Z2 -> X2\n",
    "D -> Y\n",
    "X2 -> Y\n",
    "X2 -> D\n",
    "M->Y\n",
    "D->M\n",
    "}')\n",
    "\n",
    "\n",
    "ggdag(G)+  theme_dag()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "c8HfEsyGFEtr",
    "papermill": {
     "duration": 0.017889,
     "end_time": "2021-03-23T20:01:15.310039",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.292150",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Report Relatives of X2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jIvKeikGFEtr",
    "outputId": "a38d5ee8-892f-42f2-c50a-aa246f4e769f",
    "papermill": {
     "duration": 0.07305,
     "end_time": "2021-03-23T20:01:15.400861",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.327811",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "print(parents(G, \"X2\"))\n",
    "print(children(G, \"X2\"))\n",
    "print(ancestors(G, \"X2\"))\n",
    "print(descendants(G, \"X2\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "HomeVbGRFEts",
    "papermill": {
     "duration": 0.019171,
     "end_time": "2021-03-23T20:01:15.439605",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.420434",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Find Paths Between D and Y\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "lC1jC5w1FEts",
    "outputId": "6cc1d64b-1038-4aee-ab17-2a584b2274cc",
    "papermill": {
     "duration": 0.088653,
     "end_time": "2021-03-23T20:01:15.548445",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.459792",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "paths(G, \"D\", \"Y\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "bSY1NSb8FEtt",
    "papermill": {
     "duration": 0.020042,
     "end_time": "2021-03-23T20:01:15.590055",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.570013",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# List All Testable Implications of the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ElKrk7s1FEtt",
    "outputId": "542ead69-a310-47ff-e8cd-28715e53d8bd",
    "papermill": {
     "duration": 0.078982,
     "end_time": "2021-03-23T20:01:15.689227",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.610245",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "print(impliedConditionalIndependencies(G))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "HrmgWYz5FEtu",
    "papermill": {
     "duration": 0.022026,
     "end_time": "2021-03-23T20:01:15.733010",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.710984",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Identification by Backdoor: List minimal adjustment sets to identify causal effecs $D \\to Y$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "EtAH1GjNFEtu",
    "outputId": "39d24a27-edba-4885-f865-5cf98b7e82f1",
    "papermill": {
     "duration": 0.06772,
     "end_time": "2021-03-23T20:01:15.822837",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.755117",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "print(adjustmentSets( G, \"D\", \"Y\" ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "S2AvUu-PFEtu",
    "papermill": {
     "duration": 0.021844,
     "end_time": "2021-03-23T20:01:15.866470",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.844626",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Identification via SWIG and D-separation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Q5OYVKv9FEtu",
    "outputId": "2aaf0013-09a4-423d-f7c7-7546966e9d75",
    "papermill": {
     "duration": 0.401164,
     "end_time": "2021-03-23T20:01:16.289342",
     "exception": false,
     "start_time": "2021-03-23T20:01:15.888178",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "SWIG = dagitty('dag{\n",
    "Z1 [pos=\"-2,-1.5\"]\n",
    "X1 [pos=\"-2,0\"]\n",
    "Z2 [pos=\"1.5,-1.5\"]\n",
    "X3 [pos=\"1.5, 0\"]\n",
    "Yd [outcome,pos=\"1.5,1.5\"]\n",
    "D [exposure,pos=\"-2,1.5\"]\n",
    "d [pos=\"-1, 1.5\"]\n",
    "Md [mediator, pos=\"0,1.5\"]\n",
    "X2 [pos=\"0,0\"]\n",
    "Z1 -> X1\n",
    "X1 -> D\n",
    "Z1 -> X2\n",
    "Z2 -> X3\n",
    "X3 -> Yd\n",
    "Z2 -> X2\n",
    "X2 -> Yd\n",
    "X2 -> D\n",
    "X3-> Yd\n",
    "Md-> Yd\n",
    "d-> Md\n",
    "}')\n",
    "\n",
    "ggdag(SWIG)+  theme_dag()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zHIQ7FBdFEtv",
    "papermill": {
     "duration": 0.022959,
     "end_time": "2021-03-23T20:01:16.335873",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.312914",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "\n",
    "# Deduce Conditional Exogeneity or Ignorability by D-separation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "AmZdsN1MFEtv",
    "outputId": "39bcc6d8-409d-4eef-d801-02ee4deee817",
    "papermill": {
     "duration": 0.078242,
     "end_time": "2021-03-23T20:01:16.437314",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.359072",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "print(impliedConditionalIndependencies(SWIG)[5:8])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6CyMk3m_FEtw",
    "papermill": {
     "duration": 0.02359,
     "end_time": "2021-03-23T20:01:16.484480",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.460890",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "This coincides with the backdoor criterion for this graph."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "SHOaE9wrFEtw",
    "papermill": {
     "duration": 0.024341,
     "end_time": "2021-03-23T20:01:16.532418",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.508077",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Print All Average Effects Identifiable by Conditioning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "odKgt4EkFEtw",
    "outputId": "8f96854c-fb1a-408b-9eec-b496925445b7",
    "papermill": {
     "duration": 0.276963,
     "end_time": "2021-03-23T20:01:16.833583",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.556620",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R \n",
    "for( n in names(G) ){\n",
    "    for( m in children(G,n) ){\n",
    "        a <- adjustmentSets( G, n, m )\n",
    "        if( length(a) > 0 ){\n",
    "            cat(\"The effect \",n,\"->\",m,\n",
    "                \" is identifiable by controlling for:\\n\",sep=\"\")\n",
    "            print( a, prefix=\" * \" )\n",
    "        }\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "dPh0Ogm0FEtw",
    "papermill": {
     "duration": 0.023994,
     "end_time": "2021-03-23T20:01:16.884314",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.860320",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Equivalence Classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fKfhqfpRFEtw",
    "outputId": "ded82bbd-228a-412f-ed2d-c28ee82ce3ef",
    "papermill": {
     "duration": 0.12902,
     "end_time": "2021-03-23T20:01:17.037348",
     "exception": false,
     "start_time": "2021-03-23T20:01:16.908328",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "P=equivalenceClass(G)\n",
    "plot(P)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pzUqthecFEtx",
    "papermill": {
     "duration": 0.025971,
     "end_time": "2021-03-23T20:01:17.089682",
     "exception": false,
     "start_time": "2021-03-23T20:01:17.063711",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Next Consider the elemntary Triangular Model:\n",
    "$$\n",
    "D \\to Y, \\quad X \\to (D,Y).\n",
    "$$\n",
    "This model has not testable implications and is Markov-equivalent to any other DAG difined on names $(X, D, Y)$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "JWfsjF9vFEtx",
    "outputId": "b9b13405-34d8-460b-c001-e542eef69606",
    "papermill": {
     "duration": 0.534599,
     "end_time": "2021-03-23T20:01:17.651356",
     "exception": false,
     "start_time": "2021-03-23T20:01:17.116757",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "G3<- dagitty('dag{\n",
    "D -> Y\n",
    "X -> D\n",
    "X -> Y\n",
    "}\n",
    "')\n",
    "ggdag(G3)+  theme_dag()\n",
    "print(impliedConditionalIndependencies(G3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0vZQLsAuFEty",
    "outputId": "f1eb40ed-f021-4f64-9dad-4e74d43c3e2e",
    "papermill": {
     "duration": 0.159885,
     "end_time": "2021-03-23T20:01:17.838324",
     "exception": false,
     "start_time": "2021-03-23T20:01:17.678439",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "P=equivalenceClass(G3)\n",
    "plot(P)\n",
    "equivalentDAGs(G3,10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "fCUHAqRdFEty",
    "papermill": {
     "duration": 0.029565,
     "end_time": "2021-03-23T20:01:17.897959",
     "exception": false,
     "start_time": "2021-03-23T20:01:17.868394",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Example of Testing DAG Validity\n",
    "\n",
    "Next we simulate the data from a Linear SEM associated to DAG G, and perform a test of conditional independence restrictions, exploting linearity. \n",
    "\n",
    "\n",
    "There are many other options for nonlinear models and discrete categorical variabales. Type help(localTests). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "594p7Iz9FEty",
    "outputId": "fa9914cc-5402-499b-b735-7e1687388111",
    "papermill": {
     "duration": 0.231975,
     "end_time": "2021-03-23T20:01:18.159373",
     "exception": false,
     "start_time": "2021-03-23T20:01:17.927398",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "x <- simulateSEM(G)\n",
    "localTests(G, data = x, type = c(\"cis\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Zq6L20RmFEty",
    "papermill": {
     "duration": 0.035006,
     "end_time": "2021-03-23T20:01:18.227478",
     "exception": false,
     "start_time": "2021-03-23T20:01:18.192472",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Next we replaced $D$ by $\\bar D$ generated differently:\n",
    "$$\n",
    "\\bar D= (D + Y)/2\n",
    "$$\n",
    "So basically $\\bar D$ is an average of $D$ and $Y$ generated by $D$.  We then test if the resulting collection of random variables satisifes conditional indepdendence restrictions, exploiting linearity.  We end up rejectiong these restrictions and thefore the validity of this model for the data generated in this way.  This makes sense, because the new data no longer obeys the previous DAG structure.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "X_BC4vaZFEtz",
    "outputId": "de4fd9a0-610e-415c-b8eb-17302318e8df",
    "papermill": {
     "duration": 0.113142,
     "end_time": "2021-03-23T20:01:18.373034",
     "exception": false,
     "start_time": "2021-03-23T20:01:18.259892",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%R\n",
    "x.R <- x\n",
    "x.R$D <- (x$D+ x$Y)/2\n",
    "localTests(G, data = x.R, type = c(\"cis\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "notebook-dagitty.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.7"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 37.733589,
   "end_time": "2021-03-23T20:01:18.516130",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2021-03-23T20:00:40.782541",
   "version": "2.2.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
